{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# AUC with Python\n",
    "    1.几何意义\n",
    "    2.概率意义--排序-》统计\n",
    "    3.概率意义--直方图-》统计"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据准备"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import random\n",
    "import time\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def gen_label_pred(n_sample):\n",
    "    \"\"\"\n",
    "    随机生成n个样本的标签和预测值\n",
    "    \"\"\"\n",
    "    labels = [0 for _ in range(n_sample//2)] + [1 for _ in range(n_sample//2)]\n",
    "    preds = [random.random()*0.7 for _ in range(n_sample//2)] + [(random.random() + 1)/2 for _ in range(n_sample//2)]\n",
    "\n",
    "\n",
    "    return labels,preds\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def timeit(func):\n",
    "    \"\"\"\n",
    "    装饰器，计算函数执行时间\n",
    "    \"\"\"\n",
    "    def wrapper(*args, **kwargs):\n",
    "        time_start = time.time()\n",
    "        result = func(*args, **kwargs)\n",
    "        time_end = time.time()\n",
    "        exec_time = time_end - time_start\n",
    "        print(\"{function} exec time: {time}s\".format(function=func.__name__,time=exec_time))\n",
    "        return result\n",
    "    return wrapper"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 法1 -- 面积法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "@timeit\n",
    "def area_auc(labels,preds):\n",
    "\n",
    "    data=pd.DataFrame(index=range(0,n_sample),columns=('probability','label'))\n",
    "    data['label']=np.array(labels)\n",
    "    data['probability']=np.array(preds)\n",
    "    \n",
    "    #计算 TN/FP/TP/FN\n",
    "    cm=np.arange(4).reshape(2,2)\n",
    "    cm[0,0]=len(data[data['label']==0][data['probability']<0.5]) #TN\n",
    "    cm[0,1]=len(data[data['label']==0][data['probability']>=0.5])#FP\n",
    "    cm[1,0]=len(data[data['label']==1][data['probability']<0.5]) #FN\n",
    "    cm[1,1]=len(data[data['label']==1][data['probability']>=0.5])#TP\n",
    "\n",
    "    #计算TPR,FPR\n",
    "    data.sort_values('probability',inplace=True,ascending=False)\n",
    "    TPRandFPR=pd.DataFrame(index=range(len(data)),columns=('TPR','FPR'))\n",
    "    for j in range(len(data)):\n",
    "        #以每一个概率为分类的阈值，统计此时正例和反例的数量，\n",
    "        data1=data.head(n=j+1)\n",
    "\n",
    "        FPR=len(data1[data1['label']==0])/float(len(data[data['label']==0]))  #假正例的数量/反例的数量\n",
    "        TPR=len(data1[data1['label']==1])/float(len(data[data['label']==1])) #真正例的数量/正例的数量\n",
    "\n",
    "        TPRandFPR.iloc[j]=[TPR,FPR]\n",
    "\n",
    "    # 计算AUC  ,计算小矩形的面积之和\n",
    "    auc = 0.\n",
    "    prev_x = 0\n",
    "    for x,y in zip(TPRandFPR.FPR,TPRandFPR.TPR):\n",
    "        if x != prev_x:\n",
    "            auc += (x - prev_x) * y\n",
    "            prev_x = x\n",
    "\n",
    "    return auc"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 法二:概率意义--排序+统计"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "@timeit\n",
    "def naive_auc(labels,preds):\n",
    "    \"\"\"\n",
    "      先排序，然后统计有多少正负样本对满足：正样本预测值>负样本预测值, 再除以总的正负样本对个数\n",
    "     复杂度 O(NlogN), N为样本数\n",
    "    \"\"\"\n",
    "    n_pos = sum(labels)\n",
    "    n_neg = len(labels) - n_pos\n",
    "    total_pair = n_pos * n_neg\n",
    "    \n",
    "    labels_preds = zip(labels,preds)\n",
    "    labels_preds = sorted(labels_preds,key=lambda x:x[1]) #对概率值排序\n",
    "    accumulated_neg = 0 #统计负样本出现的个数\n",
    "    satisfied_pair = 0  #统计满足条件的样本对的个数\n",
    "    for i in range(len(labels_preds)):\n",
    "        if labels_preds[i][0] == 1:\n",
    "            satisfied_pair += accumulated_neg\n",
    "        else:\n",
    "            accumulated_neg += 1\n",
    "\n",
    "    return satisfied_pair / float(total_pair)"
   ]
  },
  {
   "attachments": {
    "image.png": {
     "image/png": "iVBORw0KGgoAAAANSUhEUgAAAecAAACfCAYAAADDCxFMAAAgAElEQVR4Ae2dT4gk2Z3fvykNumhtLzLrncsOK5Hp3pIp0WjwQNTCCMEgyKRBOcy6jnXbrMMsJvPQxx4j9bEPmRjNIevgQ8GC3fYwKdNk4hmB0BiqUK9H07iMinIm0k7vRchY2CBsVos3zIu/L168iHgRGZERkfktKDLixfvze5/fi/eL979j27YN/pEACZAACZAACTSGwBcaIwkFIQESIAESIAEScAjQOLMgkAAJkAAJkEDDCNA4N0whFIcESIAESIAEaJxZBkhgbwiscN7poNPp4Hy1N5liRkjgIAnQOB+k2pnpvSKwmeHEMcoD3EzXEHM85/29yiEzQwIHR+CVg8sxM0wCe0RgMztBb3INjJawaZH3SLPMyqETKK3l/FEOkoscfumVBMol8KLc6OqMbTPD2eQalmgt0zDXqQmmTQKlE+hwnXPpTBkhCeyEwOq8g8HNFOurMbo7SZGJkAAJ7IpAaS3nPO2RPH53BYLpHAqBfSl9KywuAOv0AQ3zoRRd5vOgCJRmnO/nwJbHb45o6ZUEDAjsSenb3OEGwPE9tpkNlE4vJNA6AqUZ59blnAKTQJsJrG9xDQtHvTZngrKTAAkkEaBxTiJDdxLYVwKrc2ct9MlsU10OvTTEmmuTf67Lrk4V2pipHy2WJjnSODdJG5SFBAwJbO5Ep3aev3CDks7gIk/AYn77c2e9tb0cSeEtTNe26257v+spLAAXA2HEz8G9UyRcVV5SP1XSLSVuGudSMDISEqiDwDGMhpydTUoG2IFJjkPoDxGYZ+sUD9Qh8u4YV56BBi4wOJmhwvZ8XL5Dd6F+GlsCaJwbqxoKRgIlERAG0G+pRlqyJcWfFo03cU14SZxZ3n2AU9F8Fn/XT/GM1tmDsYMf6mcHkIslQeNcjBtDkUApBJ6/LCUa80h6R043snmALX06E9dEHBZOY81mP+4u7h3719e4XfvX+/y7wezkBFUO+xvRo36MMNXhica5DupMkwQOhMBKLMZ2/tK64DfIPYS+F/yuMemJsfb6jDT109yCROPcXN1QMhJoOQF3oxQnE6Mhks/iWOP22s/qCMNkj76nPfjtYnxlw50w5xvpXU+Io36aXJBonJusHcpGAm0msFoEk9BGKRZ3dR5OVrOmD1OMeJthJMgemTV9gYGz9GxHRpr6SVBKM5xpnJuhB0rRMAK7Ggp+47WGZbxEccLlXimt4dU5gpVdoyWuxup07hIFanJUNRhp6qfJBQKgcW62fihdTQR2ZTOzJ4S1deXvBs+een3V2i5tb921Z5l5spZX0HdmpKmfmqoW42RpnI1R0eMhEdhVyzmbaUsHYDfP4NtmXAw0u4S5XdmjpbsZycG2mJMKgGek11Nni5byu7upnyTyjXGncW6MKihIkwjsquXcpDyXKcvm2VO47WbNrmD+mmvbBo+hTqfeHV85O6qVbaSpn3TuTXhK49wELVAGEtgrAlKXKdKWUO1VpivNTNxIbzNpjPqpVFklRf5KSfEwGhIggQIE9nNCmLQ0SjveXADUoQcRB1X4M+esKdZX4y3O8aZ+2lCc2HJukJZODLcLetEgmZssiinPJuehlbIZLtFpZd52LbR/epQwzMIoiyGBrQwzAOpn11oslJ5inD8tFAkDlUPAeFIMrbMRcGOeRrHRkymBcNepGs6bXs3MtsQ09SdlejOb7e7ULNkoY4RlGUbZywv1Iym1wZeKcX69waLuv2gnhi3nX+8/ilJyaMqzlMQYiUdA2nVKdwpVGZwcw5U05nqLSU9+5i7ZOlHerdViovgzEewpBlVvtakzyva8xI1ZqB8TTTfBj2Kcm7OApAlwdi2DaUvv93ctWEvTM+XZ0uw1U2ypyzTxFCpF8s3sRLPUSuw5nfC/GMK2h1icJ6wBj4xz9zG3lzie9HASGOgVFjeiizi/0bOml6hkn5TKjbIHnfpRSl9zbxXjzAUkzVVVKNmXwktekUBhAteJxz+JE5N8w5jvUAa5yzT5FKqoyP5MZFtaYuVeLzHyu3TlZ876qz7mxuuw+njorBd2093MFji63GZCVVR+YIPVquA5l7syyp7I1I+quwbf2/xrHYHPWicxBS6bwHpq2cDIXuaM2A0HGxD/+vBRP8n+1KTVcKO8wqkR2kt7lCBjxOt6altOfvx8+b+WPRoJTv697teyp+swNjUP6WGj8VlyRGGUCVdre2r54aMyJATY2lnNG/WzNdJKIxAL3PlHAiTQMgJuRas3rvGsyIbANwjR30hFrTF2keeRBLLjzme05MgNjbMUxOESCLu2l0vJ8mqNvepHiky69A2bNV0qcUqecl0KbrswytRPLrU0yLPSrV28iZ9nAvG/L54MQ5LAlgQ+2jJ8G4N7xxPKXcPKdaSHuDvGVfBcdC2n/WXHbTr274495+tGVyVbh2dPAuii3886SMPEj7tph2WJrTR76N09K2HWtuB2Vc34dQQK9RPB0aKb0ozz/RyZ/hc5/B6S105HnmV6SDmvJq96nt+pJrF9jXVzhxuknCpVcr4zJ1xtZjhJfE+8mcjOXt7uuxSdbCb28/aPZfTG1JMmlUn52szOMDle4vLUdeyO7+EumFwmeTyAS+pnd0ouzTjnETlPKztPvG33axeYPdr2PFcpv57nfpS+v/7JfwXwCf6qSoDYYHY2AWo9Y1kxpmfAZdJM7dUCN6J1O1rCXgKDzjnW3t7UiRPMhshoBa/wZHKMZaRroY8HOIOBXa9UO82InPqpSg+1GOc8reyqMt7EePUtvSZK2g6Z9Dz3o/T98be+AeBN/PPKVCFmbPecFqNpt3Q1ongbcPjd7M7uWPqZ2qvFDU5Pj10x+g8xnQ6z1wf3+yl+BIMBsIwvueqOHwED9nRBnU1P/ZT2GtRinEuTfs8i0rf09iyTO8wOeRaD7XYFO01U2JEWY7H4dhJqM8Pi6BLje35qXYzHWcdtukvGklrAojv76ek64eSsPuZO63y7MXJf2r3/pX5yq5jGOTcyBiCB/SbgrjvexWSl8jiuntxiqOwOEh1vFmPM6piz+wEyXMRbwCLsGS6R1muwwhDrKTDpddBJsvDlZbHVMVE/+dXHU6nyM2MIEiCBJhHYzHA3nGOsyOR8ZEQcxVaeCwzVuR3zeSSkb5gvcYZOxz2VOvTQQ2cS3rnj2wt0BgN0LkQXfLwLXPJ9mJfUTyG90zgXwsZAJEACjSHQHZe0JMkdZxdd2W6L+Qq2ZNx9ox1vTfchhlD4l0CA+kkAk+7Mbu10PnxKAiTQNALOlpfVjPU+uLRTu7KbhqKR8lA/paiFxrkUjIyEBIoReH7QZ824E7L8Ay7EGK/blezv6y26oIdYqAdgOAdfXAJPZii4o3WCsrroZuxZkrwfeUKUrXamfupUH7u166TPtEngoAm4u1dJPccAol3JAk/ftqHrNB7PoyF9lNZRz7+M/0ZOrIo/znJJjTsrcOueUz91qqwjthKtUwCmTQIkkJ+AGP/sic0xOAEpPzyGIIEWEGC3dguURBEPl8DqvHO4mWfOSeCACdA4H7DymfVkAk0ZCu7P2bGVrCU+IYH9JUDjvL+6Zc62IPDaFmHzBD3sCWF5SNEvCRwWARrnw9I3c0sCJEACJNACAjTOLVASRdxfAm/sqom+vwiZMxLYSwI0znupVmaKBEiABEigzQRonNusPcpOAiRAAiSwlwRonBuk1pOZ2X5HLxokc5NFMeXZ5DxQNhIggcMkoBjnpiwgOUxlxDfUT+BA65wAJupszDMajHckQAIkUDsBxThzdkrtGqEAJEACJEACB09AMc4Hz6MdAO63Q0xKSQIkQAIkUIwAjXMxbrWGom2uFT8TJwESIIHKCZRmnD/KIWoevzmipVcSMCDA0mcAiV5IgARqJlCacf4nOTKSx2+OaOmVBAwIsPQZQKIXEiCBmgmUZpzz5OPXeTzTLwmUSoClr1ScjIwESKASAqUZZ1Z52+un0znHavtoGINHYK95/vwXAH6ID82WxrNMkAAJtIzAKy2Td6/Fte35Xudv15kjz10TZ3okQAJlESit5fydHBLl8ZsjWnolAQMCLH0GkOiFBEigZgKGxnmF804HncR/w+7YzQwnpl23q/OU9OKynLM/OLkokaXERpTlExjulCqF26dL9X02fH+NEajxR9/Xk7Lhs3wba6Y2j9RRfvR2rr+lPQJs+P/W1F4bhl+ORLiRvTT0H3hbjsL0YNlTNcH11LZ8eYrEHyR0ABdk6SrZLzOj3KWxMYVk/ed/aAN/aP+5+j5kSLieWu77JOXdd7NiL1dGZAmP/fiCeiJ4PwvWAQnpxJxZvmNIGudAHRmrBMY+PY+ukRUvWdRQfpYYkWfQJUOe7FcXifRBIMUR8elXtqISSPITCXCoN2Rp237pW9tTq2JjUWExK2KcA6MpGWZfRP+91jzyvRj+SmUsYpQFa9jYPoEUOaS0k+oB1hUp/HbxiDoypZzTOPsVmulL5iki6UUxkVJ6mZK/7CW5lI8GkyQOxg9ZKqr2K4oCPTpKTEVvP/4vxULmNs6S7rX2MWjRbMfC+QDY5n0vhsMNJeWRdcU2ICsMSx0ZwzUcc/a6yzfP8PTavR4N+xl96BvMTga4gIXp5RjdDN+Jj9e3cJO0cPogKZYu7h37MVzjdu1f8zdCgCwjOIA+5uspLFxgcDJDHauS/t//VkQyvO2+/V0AX8PXDf2vnky892gE7avbH2LkxHWBx4XHhFd4MrnG6NEW77thfrTeWL61WBrlSB0ZqyOXcd48exoYyqNeehqb2RkmwqqOHmGcZFPTo3CerhYXnq9j3EuMZ4O7G4PIDtwLWWoKQHeMR8IqXU9wVtgoaeI1dPriPzL0qHjbOAU+7Z2QA6wQvEajIfSf1T0cWW6Y68mTQuvtN7PHEG/rxUBMANv9pDuWb1nnzbymjsz1ksM4b/DMbzZbp0hsxDppu1/Q4jK7hZ0mrEmlIsKvceu16IGElkFaMgfxjCyT1Nwfum3GokYpKV4T96yW8/ePOtpo1qLAW0fI+EZ2w64WjtHURhQ4yr1PN7jL3Y0QvvNulNeY9NxZ2rtZScHyHaiysRfUUS7VGHeA2/74HOzk8Rw3tmDiybazp4NxMNjacTJPeH8yi5hwkiWbeX73zCdZpig0XrYLDgWnpKF/tPqPenffVZTt+J83xyLtpZAChe9j+vsh+zOMOkhFDqudpZ03wiBmwwuWb0NQNXqjjnLBN285S1/fx8n9ywCkFnZiF5rZ94PbdSf8prSGV+cY+D3foyWutulDNxOrlb7IMk1tfXiNZ/it5z9I8173s9UTZ8jItFfKaWV7Mqe/u2HGbvI2nR9ciq8I73/pjV+H8eFigE6FTWiWb4l1Qy+po5yKMTXlYes0Yzan0Ww8k1SlGdjar+6wtcMWcxZPsswk5K//9Wb7f54VQDyXWgLa1qJuKVHEzbJHl+kJfe9P1JazV+5zzIgO3930HqhI61f7zqXLqnsqpy0YlRStkhTLtwIkfltCWd1uGTx1FFdKuothy9l0rAAIJ43l/EpQvUszw52v7tjuZGImOMSySedrnS1mFaB0T5YSDP1lN5juf42nzzb4H3pvpbt+4R/kiNLZYW+AC2uK9VVNM6JziCu89uc21lNvppmYLPa4glnxLN85tVKDd+ooN3Qz4yx1aVsZ07SLdKHppA6NvIXp2u8ui//O9VNPdVEerBtZ5lP9telavP5c6sqNl82wmzfp2RW++8V02d67FXt3eNth9ibAdA27JYbZz1l3fIWlO+cOuH6KZ7knm/kx6X9ZvvVcIq4llNVtRgypo4g2jG6MjHM4VpC21tgoPUNP0rg1TJeLGEZ9cN7IsojKXy8SqEiYv00PtDoXM54XGHrjubvqIcr6CE+XOv60P/fHocveh4DlO067aS7UURGNGBhnCWzmEqoiIujCSEujtpxUpou9qW6/9/aPjET7yMiX7+kwWYrcm/J0SPWOEHS+3tztbEOSV77q60n/K7qF7fURHnvDOkXmVPX8BcwA0iZ6ldXrlZCTYNKd/nlR18Mt30WJ7T4cdVSEuWKcP43HIY0VWKcPiu/0FY852UXqRjedkZocWXuefONbf2wk7N/nsc4HylKANOUZg358z6yc5zxpJ36q2wn+W8YmJKLljO4YV6LlvJ7iRmzwkXM3s3A8PZbTBAcLGaNXCeFMnEuO+4DLtwntwE8JZbXwHj3UUaCGPBeKcY4vIAnHCgDTZRh5BND5DXeRKflF1iWmuq1mZscJmvqT4t/MZoV2XpKicC//T8wl0aEcliusih7JacrJ1J+U09J4ijiDbQXdBF5K6dR5KVrOwZ8w0mK70esJenma0MHWnEDyeLq0y16VPWQlx11O+Q4I57swLbOm/qTUSy3bUrx1XBbW0YvneB68iCl1UMSfJoem/E39SUlUqqfoZO74ApJwKUTGEiovInk5RrENQaQlUjmWi0TzkXHnLCtIyE/smSuPmheXS0IcCcm7bKKneclevzz8WL5NvP7gPyU+Uh6Uw9LXaaFlMC3g6UCTlpqoulaglnr70/grlxl/EX1kv8dhWakm//r3KDOzqR5CmSs7jS5WfiWBYs/0eayirpCkaPhlOToqUuYDMC3Vk7KIUt0XSQJrWjNL65wLHQ9XoJL0FWe81tTJy9Ie6fIk0o+5qy/d0h4V+HAQcqZVfKbG+erDoNilXxRgGYtQ1mdkja7h8Zwt4CnyHJah5I+nGJsSHIoYZ9vfrS9WTlMECvSYkL+grOT74ExJUXrkrXEt8M5IkcQvA5nTdz6TA4Z69o6wVMu0et/QukLOU6OvC+golp+g7Gp0ZlKmaqyDYnnJ4aB0a0vtdbHXl7eRvXDNGvsNhkG7D3Dqz6xJmFgT+I0m59zJXSDJp1BFA4qlGvplK2KG6AjLYOcib0mLs/6qj7nxOqw+HkprNTezBY62OWkrKr6zq9pqtcFvP3wr9kTsuDY7cfco9g8T+OLfabxpnIqwjEazwexsgmNvLXnAWHStbrXWtk6e+tIXToba7eqAN16LEje783Y0S3i/tHF0x7h0yvA1Jk/UMYoNZo/dbfZGy7n+YAxpzDLeo+4t9eqca4ZtxLMeJtcjLLVLwOLlWyu/xrFI+d6XukKDo5FORXQUzUi766BoXnLeJRpy6Ysn7w5c4ddpvq/wMJz7hZSnYaDPh2jxGsiQ+GVm2aOR5R4Sr35RB/fRloiaB+PWfMK+4PH4Rvb3Dbq11XBFWIo44i39tT0dTe21Hrjr2mCeerHDHqJ4fvUh6nZ19WtQthVB/e5tOZ86t2gwaXcnUe7V1opSV8TKvOpfilwtpzB5XyM9HawrJJyNulR1a1QHff7TSB7y1kFOT5SIY8s66L0PwvEmNR+x8h3YgnjLXn7PIhkzuFG6tZWXUJdoyosWphdWdjqFRG1LdprFM2honEPB3e7NQOi1vVzKZkgXn+pHiky69BVsTZdKnJIn3aWmkPWXOo/CrTyWzksxGtlTJa31dGTL2/gJfwEujVhOvgMPKqsaeNrR0ueIHDDOb+w0Wd6Jk1ueoh+GxgmrxtTknZbCBOqUE5Seh5WXAc+AfVixaeN30iqvfMuiu9e6shj3Jbs0r2zL0tV1XZ6OBF8rZx302WehUfUJNFVPn33mS6j/VYyz3lMRV7fygGb81rYzZCqSnBPGTVOusPK/cE4rIrF2yB+fmxm3wFqW2xJdT6e2YvMcb8DX7O+l5l6kD/sdXeDUcPkeOi+FsMCailN8XEytsCJ1KuJEXmL7aX0ZcCWqg2e89GW3HPPx24lvxxjKZX0nqVaciFu+U4pTaem3va4oDURDI9qXOigTr2ehI4bau1GMc/yrIzPyFA9+pae+bPHqMSWSHI8ChQZhNJW/Y3CSvujdysH98nf9uC+xYozkHgU1c0Ha4YUTx2jptMrdXoClPZWbn57XzC49T/YfVAXQ6y4MeipEepFWlWj1h/kSV0LHyQiayFOB53+ARPIZzWMj7/bROKe+m+Vqoe11Rbk0mhVbRDctr4OyyH7u9MN/pm2wKhPCCs1OSRzl7s/XEHNQLgbRiSK/TgxRxYMLDORDM86AS3uIRXxWC7Ba4MayxGkasJfAoHOOdWSymWaC2VDsfJz2Jw6hP8YyMvmsjwc4g06E5JjciRGYPsSfJnva+omYMBNsEdm9h2MlxsWggxPT3QgayVMufS7TazFpUDtZSck8byskEJbv+rbL35e6okI17SDqbeqgF6p8jayDQiF/86XXgJe/w1dCp+BKMc6Be0kXXYyvhIEWhT5qoMtJwJ3pmR6XMlvbqYT1M7VXixucnnrmqP8Q0+lQP3NVTrDfT/Ej5BsAmhmw3fEjQPloAT7BX8lxB9cinh4mx9551feDB7u92NzhBiM8MtwBv7k8XWyrc28WsZ0wQ3m3dA84NaV810aiTXVFbZDqTTijDlKrxvrroDRcL4D7wMtfvYpIs/jlcydQxcZZpCEMtI3lSBjoE2f3re+kyVvXs80Mi6NLjO/5AnQxHmd9w7sfB0kt4M3sDE9P14g0mv3o0cfcaZ27TAJn5WIzO0Gn4zT3YXsRqQVQCVLerfMiKNFZR+gpTtrbhvIEROlzl/4MbqZY0zBr1bcrR1353lXahdNpbNkunKPmBsxZB72Qm85N15Njm1/iV69Kpvnlc7x87Q1HHzswzq7enQ387Ue47VXRgt6+bK2e3GKotAjdisNfYyx+xRnScteXazSHi3ieRNgzXIZdxBoRVxhiPQUmvQ465yvY9i3eU/y56zKvoIim+NrRrdji0nDf6abydA3zYxyJY0jZlb2jgpOcTKPKd7KYkSdNKNsRgQ7pJrMOegHfPm+jp2+UVKfLqnmubLHw8nduZ/arsidhmP09S7MGrJv9/GN7+OVwTltkIoEjuGZCmC5D62m4ZEhMtEmc4WQWny+H+A2XlSRMKhNpBctQkiaq6YTegZsyGSN98pckD3lKMNIvP1Y35Uv3Hj7dxwlhYe4qv/Lf0TAhs3dbrGAIlheyrgjxlXWlrHN2VoxIkzWN6iBvnfN2eopO0vLLy1Z1+ucf2+Hr/rntzgVTJqhKHF+RjfaursWXzc66Zk0y1R2X1DJ1x85EV7Y7qeoK9jgUILk13Ydtz0OPTbkSBy1cecKszuF0A8/FznEn6E2uxb5xWOq6hRvNs3Glr5C2Nx/+0An380KhGagwgUaX7cK5ak5Ar0s3EKhIHeTE8eqWdfp93Ic4deN3znyfMur0j/478I+DHuzf4EuvvYaXv/pKdLw5yDiws25tKc0SDfNb+PC30qk9ciJp185WhOljvWnB0549uLRTu7LTwnYqmTSXlqLZM6d7//ER1l43sNsVKbZCHWLhzYQ/MZ3BbZZk4Kt8no36LMRbrwdZzXfx9a8B+C7e7uYLRt85CbCuyAmsGu+56iBvQlVZkhSvg8Iudnz6I3zh799C8LrrxpsVgWsxzooMW9y6E7L8c3LFGO8lzuDfdzoLDCUDErgvhrDtS+DJDJstUo8H7aKbUVkmH9kHp/WcNQUtnmZ1Ls4L0ek4Y+f68dk+5mLf8uUI15Mzs6M2c4m7XzxzZd3z/P2jTpFgDBMjwLoihqRxDi+D4VZftLx10L8SFbrTcg6aqH5UBX+3qYNE6xtwJqn9wT/EPw1mQr/E868407QhzwVTBaylW1sVovi9OxNc6jkGEO1KFnH3bRu6TuPxPBrSl8NKO2l+ZLC8yo9I85sat8b/zp1ES2HgHoJgTdew7YyvDSFgfw47pQMjNc8N5fkpEH7l7lwJYYLv3aaADb3xKpMA64pMRLV7eA3OQTB7VQe9xP374kPhDaf7+ub5Sxy/4eXTcRMfJP59VAEdMf4cdWrTnTvGK5Zq8Y8E2kjgR5+iUNe2aFH0xOY2ujH/NoKoXGbWFZUj3jqBZENlEvXzF8/x6n3XCJr4r8OPGMU2bdPvRcu5DshMkwRIoE0E3JZzmyQ+PFn1LUhTDm/cd9cHm/qvw5+pYRaytXzMuQ68TJMEyiNQeEJYeSIwJhIggQYSoHFuoFIoEgmQAAmQwGEToHE+bP0z9yRAAiRAAg0kQOPcIKX83ts/MpJG2QXOKMwhejLleYhsmGcSIIFmE1CMs1hAwr+6CPz2w7eMkv59WmcjTqY8jSKjJxIgARLYIQHFOAf7l+xQBCZFAiRAAiRAAiQgE1CMs/yI100l8L+CnWaaKiHlIgESIAES2IYAjfM29GoKS9tcE3gmSwIkQAI7IlCacfbP0DSRO49fk/johwTMCbD0mbOiTxIggboIlGac85z1k8dvXWCY7r4SYOnbV80yXySwTwRKM855oHCycR5a9FsuAZa+cnkyNhIggSoI1GKcOWaqV2Wnc4Tv6x/RtQABPU+WvgIoGYQESGDHBFp+8MWOaVWcnG3fVpzCYUVPnoelb+aWBPaJQC0t530CyLyQwD4SWO1jppgnEmgRgZYbZ3FGa6dFuCkqCbSDwIebdshJKUlgXwm03DjzjNZ9LZjMV70E/vNP6k2fqZPAoRNouXE+dPUx/yRQDYG//phN52rIMlYSMCNA42zGib5IoBICz19WEm1GpO/jzzoddIL/P8P7Soj/+2+7isuWt6tzKT05bf31OQe9twSeMzj1kxNY9d5pnKtnzBRIoDEEPpm8jk7nL/DBOz+AbdvO/0/Gv8RfdDp4ffJJdXL25256y5GUhoXp2pXBl8VeT2EBuBgIo30O2mgJV5WX1E+VdAvFTeNcCBsDkUA5BN54rZx4TGIRhvlbs58BwjD/h3eDIG9OP8UP3gF+NvsWqmixRjoH+kME5tk6xQO1gd4d48oz0MAFBiczVPjJEDA45Avqp5nap3Fupl4oFQk4BFbnCasRfv4LAD+E8azqTyaYCMMM4J1vh4bZx/zut99xLi8G5bdWfyXX/ps73HiJWqcPoNpm51H3AU5F81n8XT/Fh7TOHoxqfqifarhuGyuN87YEGX4vCT0qReAAAAr5SURBVMj2pM4M9ud2Kcm//69n8EwzNLYZePfb8MwzHs/KnQz2pd9IWVjf4tq5tXAaazb7/rq4d+xfX+NvfGvuO+3pb/H5B2JJ6QmKqo36aWaBonFupl4oVc0EdtXbXLxCzgPoffz4A8//O99GvN0snh3jj77p+rmePKlsrHe1uPAEOcY9bbNZPN7g7kAMsgej4I+7z0On08PE/eIpGE8YjPoJWdR9ReNctwaYPglUTeD9H8O3zclJvYnuV/2nN7grs/EcHAS2QmCbR0P0/eRiv2vcBsZmpG/px8K03yHP/AMx3BEY5dEStn2FceLHTgYb6icDUD2PaZzr4c5UScAhkKdCLorsk80vg6Df/KOgvzhw8y+O/aYzrnG79l23/w3r/gX8dvNomGyaV+eDwJ81fZjQ0t9erjbG4BrlDgYCpGOUbdjzZJYmeaR+TCjt3g+N8+6ZM0US2CmBm79xR5tFol/tvmmU9k2pTWc3yU3QVz1Com1enbuGRwQZLXFVuDlolM3WeKrCKKuZp35UIvXe0zjXy5+pk0AhAt23vwvga/h6odDZga7LbDo7yW3w7KnXV63t0l7hXGyK4jQJAWu63rpFmJ3L5vvYzE6czVscLNYUa7E2fcuWsj7X1I+eS32uNM71sY+lfGI43fJFLCQddARMeerC0q1kAptn8G0zLgaa3cLcruzR0t2U5NBbzL5R7omZXr5Rvhrrl56VoSrqpwyKpcahGOemLCApNY+ticy4QqJ1NtKpMU+j2OhpGwKbZ0+DJVSxXcG8ncrELmGVNAq3EXzHYXdulL38UT87VrRBcq9E/exqAUk0Vd6RAAk0i4B11CtRIKnLFGlLqEpMsoVRiXHloPu6ylZyjA31E0PSAAel5dwAiShCNoFgemW2V/oggXAWNvDLTfJ2W/LEsePkRcgFgEpLo7TjzQWi3MMgYsOZ9dQCrifoifH3kxnKXNGWjIz6SWZT3xMa5/rYF06ZtrkwuoMM+Ga4gNkw/xZKbTivzJZQGQq319664yvngJCdGmnqp5Flisa5kWqhUCRQIoFga07gZ4l7YX6CYDm07kCKLcQJd50q2eibyLSamW1raepPSnMzm1W2k5pvpJ1DvPyWdBWnkgAoRz8rrIoeIWbK3tTfjnQkJVPJJY1zJVgZKQk0icC78M61AD74cezsZlfSG/jLoRMPpCiUJWlXsJKNfiCOcxZx0oEdt5j05Gfukq0TZWXEajFR/AWxp1w8xaBTfE/rlIiDR6KrW0yUc4y0P8u9VCNdjn42s8cYDDoFTzVrt44CZZV8UYtx5mTjkrXI6HIQOMzS9+6/HMPdOvuX0A47B1t8jvCozI0/pC5TU6Pvz1juiHFXk//FELY9xCLJaEXGufuY20scT3o4CQz0CosbsYZ4nrKlqL6IWdPL4ttm6qPUulZmpAvoJybgZoYzb3Nv9xxuSW+m4+Z7oKMYly0dajHOHDPVa42Hy+u5FHXV8zzQ0vfmFP9OTDbCzzD7S7X/8RNM/o27+/ZoqTNQ/gELotLN11KUu0yTT6GKatjv0hUtxuj/EiOMsFTdnfVXfcyN12H18dBh4aa7mS1wdFnmGuINVqtqpnLpjPRJ8JER5WhyV0Q/0Xg3mJ1NcOytTw/0Jc7kFuuzC886b6+Oony2uLP5RwIk0D4Cy5ENWPZ0nU/05QjiDEr7n41/EgT8wTuu2zclt+ChbdvrqeWEEeHc/5G9lD0kXKvhRiaBEuJynZf2CAZpr6e2Fcjqyyx+LXs0UvMiP3f9+Eyt6VqTd9V/8r0In+fvp5/n8S38ru2p5aefvyyUoR8RRzyfa3s6mtqpuW+Ijr48/Dgv9J35F1+m/CMBEmgbgYLG2cmmE9av1GHjm2M7NNUaEJqKNNnQygZDSkMylvHKXJOm1snQOEthHQMUCLu2l0vZZOjiU/1IkUmXvmGzpkslTslTzsv8xtlPQDA3Nc7l6ccxzKORPVU+utbTUeSjUfgLVOCLLP22SUeS2JVf1tKtvUVDXwnqdrcpjrwlARJII9Cf47PPpC7jT6dQj8OI9JR2x7gKupJF1zLwP3+RlEAX4ysp7iBc6Ga6c5s79pyvG12Vah2ePQmgi34/61xFEz/uph2WJYYJeujdPStl1nbxE8oEc9MjI8vRj9DNGS5x9fAITwfSGHOngzMMgbPQTWxBerFQh1JCTbVJR6HU1V+13Di7Ba16TEyBBPaMQMbQ+4MkG7a5ww12d8Zy5oSrzQwnHXk2tqwnbyayM8vZ9ROdbCb2877AQJ50ljSpTIp2MzvD5HiJy1PXsTu+h7vI14zkeQ8vA8PsTxz09/52PsSWeHSvH/lAEzPNk48IpY6SikjLjXNStuhOAiSQRiDDNiccsOBO/kGtZywrxvQMuEyaqb1a4Ea0bsW5x0tg0DnH2tvkw524pJlgNkRGK3iFJ5NjLCOTz/p4gLOCy4jStNTMZ2LCXtD70b0H9YTwxaAjzYTPyAN1lAhI2Vs70R8fkAAJHDQBMYTUc1qMtt9iqoWHmK0dn1E+n8eFWS1ucHp6jOtbAP2HmE7X2Uul+v0UP4LBAFjajh95PnZ3/AgQLXiNbHHJ9tjF61kxXY5HHSWXBback9nwCQmQAAC3K9hpolZ0lnAFmDczLI4uMb7nx93FeNz3bxJ+3TksST3bojv76ek64eSsPuZO63y7MfIEwZrr7BhjRTzrCEbHplBHCrjoLY1zlAfvSIAEFALuumPTCUdK4JpuV09uMVRa+NHxZjFhSR1zdj9Ahov4GHZsnFWTrxWGWE+BSa+DTpKF14TbK6f1La6P7yUMi0RzSh1Feah37NZWifCeBFpD4Bq3a2cScmsk3omgmxnuhnOMlcScj4yIo9jKc4Gh2hXt9ZGLTWxEF3rPm5l8iTN0OtdKrD10JpKTM769QGcwQOfiT/A9+xbvSY/3/VJsajIaRscYxMYpsT4L6iizKLDlnImIHkiggQR6RxALeW7u5JHPBspZh0jdMTJ7sA3ksu2HuDsRS4MunQlQ6s5l4uQoa7qO7mImJor1557bARhmsczO3wVsdY7BzRQP+/5QiOidiPdCOOipo8wSSOOciYgeSKCBBLxZstdO07mB8u1SJOfgi2rGeh9c2uHM5F3mqWVpOUMGj4+C7TrDD5khFt5StZOKlpvtq45onFv2ElBcEnAJ9DEUu4FcLDKW/uyS17abArnh/cMuRIvV7Ur2N7QQXdBhZe/76zgHX1wCT2Yotx+hi27Sem8P66F/HPnj+EJXtt+CjhQ5cdCIDXs5wvXkzOz4zkj4rJv91RHHnLN0z+ck0FAC/YdTWBcTDM6HDZlFve2mQG74yLAwrmBHHdC3bURHNV0FjeeKR09v1lHK3OHIaUj5FZ0ad/7o2hFC9FQMLhxZ3W79jC8Y4dPp6k/OXirHA9VRR2wQmoyMT0iABBpNwK8oxUSkyMYYjZaawpEACWQQYLd2BiA+JoFGExAtEnE8n7NF5QEv4Wm0kigcCeQnwJZzfmYMQQINJSCWBom1u2LHSjths4yGik6xSIAEIgRonCM4eEMCJEACJEAC9RNgt3b9OqAEJEACJEACJBAhQOMcwcEbEiABEiABEqifAI1z/TqgBCRAAiRAAiQQIUDjHMHBGxIgARIgARKonwCNc/06oAQkQAIkQAIkECFA4xzBwRsSIAESIAESqJ8AjXP9OqAEJEACJEACJBAhQOMcwcEbEiABEiABEqifAI1z/TqgBCRAAiRAAiQQIfD/AUO8eDbwxkNhAAAAAElFTkSuQmCC"
    }
   },
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 法三 -- 概率意义--直方图+统计\n",
    "![image.png](attachment:image.png)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "@timeit\n",
    "def approximate_auc(labels,preds,n_bins=100):\n",
    "    \"\"\"\n",
    "    近似方法，将预测值分桶(n_bins)，对正负样本分别构建直方图，再统计满足条件的正负样本对\n",
    "    复杂度 O(N)\n",
    "    这种方法有什么缺点？怎么分桶？一般100-1000效果都不错\n",
    "    \n",
    "    \"\"\"\n",
    "    n_pos = sum(labels)\n",
    "    n_neg = len(labels) - n_pos\n",
    "    total_pair = n_pos * n_neg\n",
    "    \n",
    "    pos_histogram = [0 for _ in range(n_bins)]\n",
    "    neg_histogram = [0 for _ in range(n_bins)]\n",
    "    bin_width = 1.0 / n_bins\n",
    "    for i in range(len(labels)):\n",
    "        nth_bin = int(preds[i]/bin_width)\n",
    "        if labels[i]==1:\n",
    "            pos_histogram[nth_bin] += 1\n",
    "        else:\n",
    "            neg_histogram[nth_bin] += 1\n",
    "    \n",
    "    accumulated_neg = 0\n",
    "    satisfied_pair = 0\n",
    "    for i in range(n_bins):\n",
    "        satisfied_pair += (pos_histogram[i]*accumulated_neg + pos_histogram[i]*neg_histogram[i]*0.5)\n",
    "        accumulated_neg += neg_histogram[i]\n",
    "    \n",
    "    return satisfied_pair / float(total_pair)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 测试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Zhao\\Miniconda3\\lib\\site-packages\\ipykernel_launcher.py:9: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n",
      "  if __name__ == '__main__':\n",
      "C:\\Users\\Zhao\\Miniconda3\\lib\\site-packages\\ipykernel_launcher.py:10: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n",
      "  # Remove the CWD from sys.path while we load stuff.\n",
      "C:\\Users\\Zhao\\Miniconda3\\lib\\site-packages\\ipykernel_launcher.py:11: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n",
      "  # This is added back by InteractiveShellApp.init_path()\n",
      "C:\\Users\\Zhao\\Miniconda3\\lib\\site-packages\\ipykernel_launcher.py:12: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n",
      "  if sys.path[0] == '':\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "area_auc exec time: 41.72503924369812s\n",
      "naive_auc exec time: 0.00701904296875s\n",
      "approximate_auc exec time: 0.004010915756225586s\n",
      "area auc result:0.945862\n",
      "naive auc result:0.945862\n",
      "approximate auc result:0.945906\n"
     ]
    }
   ],
   "source": [
    "if __name__ == \"__main__\":\n",
    "    n_sample = 10000\n",
    "    labels,preds = gen_label_pred(n_sample)\n",
    "    area_auc_rst = area_auc(labels,preds)\n",
    "    naive_auc_rst = naive_auc(labels,preds)\n",
    "    approximate_auc_rst = approximate_auc(labels,preds)\n",
    "    print (\"area auc result:%f\"%area_auc_rst)\n",
    "    print (\"naive auc result:%f\"%naive_auc_rst)\n",
    "    print (\"approximate auc result:%f\"%approximate_auc_rst)\n",
    "\n",
    "    \"\"\"\n",
    "    area_auc exec time: 41.72503924369812s\n",
    "    naive_auc exec time: 0.00701904296875s\n",
    "    approximate_auc exec time: 0.004010915756225586s\n",
    "    area auc result:0.945862\n",
    "    naive auc result:0.945862\n",
    "    approximate auc result:0.945906\n",
    "    \"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
